# -*- coding: utf-8 -*-
import json
import scrapy
from datetime import datetime
from scrapy import Request
from scrapy.utils.project import get_project_settings
from scrapy.exceptions import IgnoreRequest
from zc_core.spiders.base import BaseSpider
from zc_core.model.items import Box
from zc_core.client.redis_client import Redis
from zc_core.util.batch_gen import time_to_batch_no
from zc_core.util.http_util import retry_request
from zc_core.util.item_serializer import sku_serialize
from changsha.rules import parse_catalog, parse_sku_pagination


class PageSpider(BaseSpider):
    name = 'page'
    # 常用链接
    index_url = 'http://hunan.gpmart.cn/shopHome/gotoShopPage.action'
    sku_list_url = 'http://hunan.gpmart.cn/frontBrands/getNewBrandsAndProductInfos.action?level=3&orderBy=normal&shopInfoId={}&productTypeId={}&brandParams=&brandId=&currentPage={}'

    def __init__(self, batchNo=None, spId=None, *args, **kwargs):
        super(PageSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        self.sp_id = spId
        self.redis = Redis()
        settings = get_project_settings()
        self.task_queue_name = '{}:task:item:{}'.format(settings.get('BOT_NAME'), self.batch_no)
        self.logger.info('供应商: spId=%s' % self.sp_id)

    def start_requests(self):
        yield Request(
            method='POST',
            url=self.index_url,
            callback=self.parse_index,
            errback=self.error_back,
            meta={
                'reqType': 'catalog',
                'batchNo': self.batch_no,
            },
            priority=200,
            dont_filter=True
        )

    def parse_index(self, response):
        cats = parse_catalog(response)
        if cats:
            self.logger.info('分类: count[%s]' % len(cats))
            yield Box('catalog', self.batch_no, cats)

        for cat in cats:
            if cat.get('level') == 3:
                # 采集sku列表
                page = 1
                cat_id = cat.get('catalogId')
                # 分拨采
                yield Request(
                    method='POST',
                    url=self.sku_list_url.format(self.sp_id, cat_id, page),
                    callback=self.parse_sku_page,
                    errback=self.error_back,
                    meta={
                        'reqType': 'sku',
                        'batchNo': self.batch_no,
                        'supplierId': self.sp_id,
                        'catalogId': cat_id,
                        'page': page
                    },
                    priority=200,
                    dont_filter=True
                )

    # 处理首页品类列表
    def parse_sku_page(self, response):
        meta = response.meta
        sp_id = meta.get('supplierId')
        cat_id = meta.get('catalogId')

        total = parse_sku_pagination(response)
        self.logger.info('总页数: cat=%s, sp=%s, total=%s' % (cat_id, sp_id, total))
        if total:
            for page in range(1, total + 1):
                self.redis.client.lpush(self.task_queue_name,
                                        json.dumps({
                                            'spId': sp_id,
                                            'catId': cat_id,
                                            'page': page,
                                        }, default=sku_serialize))
